Introduction

This study looked at individual differences in cognition and behavior of different species of lemurs. Temperament was measured by their reswponses to novel objects. The variables I used throughout my example include:

Trial_Type: baseline (table only), person (person @ table), static (stationary novel object), and moving (moving novel object)

LatencyProximity: latency to approach proximity area at any point (seconds)

DurationInProximity: amount of time spent in the proximity (seconds)



Summary Table

A summary table of characteristics separated into the trial type it was exposed to. Latency proximity was organized into groups of 5 due to the length of the observations.

lemur <- read_excel("Cantwell_et_al_Lemurs_Novel_Object_Task.xlsx")

lemur$LatencyProximity <- as.numeric(lemur$LatencyProximity)
lemur$LPS <- cut(
                 lemur$LatencyProximity,
                 breaks = seq(0, max(lemur$LatencyProximity, na.rm = TRUE) + 5, by = 5),
                 right = FALSE,  
                 labels = paste(seq(0, max(lemur$LatencyProximity, na.rm = TRUE), by = 5),
                 seq(4.99, max(lemur$LatencyProximity, na.rm = TRUE) + 4.99, by = 5),
                 sep = "-")
                )

head(lemur)
## # A tibble: 6 × 10
##   CodeID Species   Sex     Age Trial_Num Trial_Type DurationInProximity Approach
##    <dbl> <chr>     <chr> <dbl>     <dbl> <chr>                    <dbl>    <dbl>
## 1      1 Ring-tai… F      14.3         1 Baseline                  0           0
## 2      1 Ring-tai… F      14.3         2 Person                   11.4         1
## 3      1 Ring-tai… F      14.3         3 Static                   11.1         1
## 4      1 Ring-tai… F      14.3         4 Moving                    0           0
## 5      2 Ruffed l… M       6.2         1 Baseline                 10.6         1
## 6      2 Ruffed l… M       6.2         2 Person                    5.37        1
## # ℹ 2 more variables: LatencyProximity <dbl>, LPS <fct>
tbl_summary(lemur, by = Trial_Type, missing = "no")
Characteristic Baseline
N = 65
1
Moving
N = 65
1
Person
N = 65
1
Static
N = 65
1
CodeID 33 (17, 49) 33 (17, 49) 33 (17, 49) 33 (17, 49)
Species



    BlueEyed Black lemurs 11 (17%) 11 (17%) 11 (17%) 11 (17%)
    Collared lemurs 4 (6.2%) 4 (6.2%) 4 (6.2%) 4 (6.2%)
    Crowned lemurs 9 (14%) 9 (14%) 9 (14%) 9 (14%)
    Mongoose lemurs 10 (15%) 10 (15%) 10 (15%) 10 (15%)
    Ring-tailed lemurs 10 (15%) 10 (15%) 10 (15%) 10 (15%)
    Ruffed lemurs 10 (15%) 10 (15%) 10 (15%) 10 (15%)
    Sifakas 11 (17%) 11 (17%) 11 (17%) 11 (17%)
Sex



    F 28 (43%) 28 (43%) 28 (43%) 28 (43%)
    M 37 (57%) 37 (57%) 37 (57%) 37 (57%)
Age 6 (4, 14) 6 (4, 14) 6 (4, 14) 6 (4, 14)
Trial_Num



    1 65 (100%) 0 (0%) 0 (0%) 0 (0%)
    2 0 (0%) 0 (0%) 65 (100%) 0 (0%)
    3 0 (0%) 0 (0%) 0 (0%) 65 (100%)
    4 0 (0%) 65 (100%) 0 (0%) 0 (0%)
DurationInProximity 4 (0, 16) 0 (0, 16) 7 (0, 23) 15 (6, 34)
Approach 37 (57%) 34 (52%) 38 (58%) 59 (91%)
LatencyProximity 17 (8, 25) 9 (6, 20) 13 (6, 26) 7 (5, 12)
LPS



    0-4.99 0 (0%) 5 (15%) 5 (13%) 10 (17%)
    5-9.99 12 (32%) 13 (38%) 10 (26%) 30 (51%)
    10-14.99 4 (11%) 4 (12%) 7 (18%) 9 (15%)
    15-19.99 7 (19%) 4 (12%) 3 (7.9%) 2 (3.4%)
    20-24.99 4 (11%) 0 (0%) 3 (7.9%) 1 (1.7%)
    25-29.99 3 (8.1%) 0 (0%) 3 (7.9%) 4 (6.8%)
    30-34.99 3 (8.1%) 1 (2.9%) 1 (2.6%) 1 (1.7%)
    35-39.99 1 (2.7%) 2 (5.9%) 1 (2.6%) 0 (0%)
    40-44.99 0 (0%) 4 (12%) 3 (7.9%) 0 (0%)
    45-49.99 2 (5.4%) 0 (0%) 0 (0%) 1 (1.7%)
    50-54.99 1 (2.7%) 1 (2.9%) 2 (5.3%) 0 (0%)
    55-59.99 0 (0%) 0 (0%) 0 (0%) 1 (1.7%)
1 Median (Q1, Q3); n (%)





Now I am running a basic summary of Ring-tailed lemurs and their latency proximity and duration in proximity. This table is displaying medians, which is not what we want, so let’s create a table looking at means and standard deviation.

rtl <- subset(lemur, lemur$Species == "Ring-tailed lemurs")

rtl1 <- rtl %>%
        tbl_summary(
        by = Sex,
        missing = "no", #gets rid of pesky NAs
        include = c(DurationInProximity, LPS)
        )
rtl1
Characteristic F
N = 24
1
M
N = 16
1
DurationInProximity 0 (0, 11) 0 (0, 13)
LPS

    0-4.99 2 (20%) 1 (17%)
    5-9.99 4 (40%) 2 (33%)
    10-14.99 2 (20%) 1 (17%)
    15-19.99 0 (0%) 1 (17%)
    20-24.99 0 (0%) 0 (0%)
    25-29.99 1 (10%) 0 (0%)
    30-34.99 0 (0%) 0 (0%)
    35-39.99 0 (0%) 0 (0%)
    40-44.99 0 (0%) 0 (0%)
    45-49.99 1 (10%) 0 (0%)
    50-54.99 0 (0%) 0 (0%)
    55-59.99 0 (0%) 1 (17%)
1 Median (Q1, Q3); n (%)
theme_gtsummary_mean_sd(set_theme = TRUE)





T-Test

I ran the same code, just added a theme to ensure the table was using the means and standard deviation. Now let’s clean this up a bit!

cl1 <- lemur %>%
       filter(Species == "Crowned lemurs") %>%
       select(
              Sex,
              LatencyProximity,
              DurationInProximity
       ) %>%
       tbl_summary(
       by = Sex,
       missing = "no",
       type = all_continuous() ~ "continuous",
       digits = all_continuous() ~ 1,
       statistic = list(all_continuous() ~ "{mean} ({sd})", #can use any character for parentheses
                     all_categorical() ~ "{n}"), #count obs. for categorical variables
       label = list(
                    LatencyProximity ~ "LP (s)",
                    DurationInProximity ~ "DIP (s)"))
                          

cl1
Characteristic F
N = 12
1
M
N = 24
1
LP (s) 18.5 (13.8) 8.4 (3.5)
DIP (s) 12.0 (12.1) 11.0 (14.6)
1 Mean (SD)
cl1 <- cl1 %>%
       add_p(test = all_continuous() ~ "t.test",
             pvalue_fun = ~ style_pvalue(.x, digits = 2)) %>%
       modify_caption("Table 1. *Crowned Lemur with different timed proximities*") %>%
       modify_fmt_fun(statistic ~ style_sigfig) %>% 
       modify_header(
         update = list(
                       label ~ '',
                       stat_1 ~ '**Female**',
                       stat_2 ~ '**Male**',
                       statistic ~ "**t-value**",
                       p.value ~ '**P-value**'
                     )
       )
       

cl1
Table 1. Crowned Lemur with different timed proximities
Female1 Male1 t-value2 P-value2
LP (s) 18.5 (13.8) 8.4 (3.5) 2.4 0.038
DIP (s) 12.0 (12.1) 11.0 (14.6) 0.21 0.84
1 Mean (SD)
2 Welch Two Sample t-test
show_header_names(cl1)
## Column Name   Header          level*    N*         n*         p*             
## label         ""                        36 <int>                             
## stat_1        "**Female**"    F <chr>   36 <int>   12 <int>   0.333 <dbl>    
## stat_2        "**Male**"      M <chr>   36 <int>   24 <int>   0.667 <dbl>    
## statistic     "**t-value**"             36 <int>                             
## p.value       "**P-value**"             36 <int>
## * These values may be dynamically placed into headers (and other locations).
## ℹ Review the `modify_header()` (`?gtsummary::modify_header()`) help for
##   examples.

Ah! Much better! Now I have a table that shows mean and SD along with the t-value and p-value.





ANOVA

Now let’s run an ANOVA and compare number of males and females, latency proximity, and duration in proximity.

anova1 <- lemur %>%
          select(Species, Sex, LatencyProximity, DurationInProximity) %>% 
          filter(Species %in% c("Sifakas", "Ruffed lemurs", "Mongoose lemurs")) %>%
          tbl_summary(
          by = Species,
          missing = "no",
          digits = all_continuous() ~ 1,
          label = list(
                       Sex ~ "Sex",
                       LatencyProximity ~ "LP (s)",
                       DurationInProximity ~ "DP (s)"
                      ),
          statistic = list(all_continuous() ~ "{mean} ({sd})",
                     all_categorical() ~ "{n}") 
                      ) %>%
          add_p(pvalue_fun = ~ style_pvalue(.x, digits = 2)) %>% 
          modify_header(
          update = list(
                        label ~ '',
                        stat_1 ~ '*Sifakas*',
                        stat_2 ~ '*Ruffed Lemurs*',
                        stat_3 ~ '*Mongoose Lemurs*',
                        p.value ~ 'P-value'
                       )
               )

anova1
Sifakas1 Ruffed Lemurs1 Mongoose Lemurs1 P-value2
Sex


0.003
    F 12 8 24
    M 28 32 20
LP (s) 15.9 (13.2) 15.0 (12.0) 17.1 (14.2) 0.80
DP (s) 11.9 (14.4) 22.8 (18.7) 27.1 (20.0) <0.001
1 n; Mean (SD)
2 Pearson’s Chi-squared test; One-way analysis of means (not assuming equal variances)
anova1 %>%
      as_gt() %>% 
      opt_stylize(style = 6, color = 'red') %>%
      tab_header(title = md("**Lemur Characteristics**")) %>%
      opt_align_table_header(align = "left") %>%
      tab_options(heading.align = "left")
Lemur Characteristics
Sifakas1 Ruffed Lemurs1 Mongoose Lemurs1 P-value2
Sex


0.003
    F 12 8 24
    M 28 32 20
LP (s) 15.9 (13.2) 15.0 (12.0) 17.1 (14.2) 0.80
DP (s) 11.9 (14.4) 22.8 (18.7) 27.1 (20.0) <0.001
1 n; Mean (SD)
2 Pearson’s Chi-squared test; One-way analysis of means (not assuming equal variances)





ANCOVA

Finally, let’s run an ANCOVA looking at Blue Eyed lemurs and differences between males and females after accounting for age. I then ran a t-test to compare the means between age and sex. Lastly, I ran a summary to observe the linear model between age and sex on latency as an additive model.

bebl <- lemur %>%
        filter(Species == "BlueEyed Black lemurs")

bebl1 <- bebl %>% 
         select(Sex, LatencyProximity, DurationInProximity, Age)%>%
         tbl_summary(by=Sex, 
              digits = all_continuous() ~ 1,
              missing = "no",
              include=c(LatencyProximity, DurationInProximity)) %>%
         add_difference(
              adj.vars = c(Age),
         pvalue_fun = ~style_pvalue(.x, digits = 2) 
         )

bebl1
Characteristic F
N = 24
1
M
N = 20
1
Adjusted Difference2 95% CI2 p-value2
LatencyProximity 18.1 (15.8) 10.0 (9.9) 8.7 -4.2, 22 0.18
DurationInProximity 6.8 (7.5) 10.9 (14.9) -2.6 -9.4, 4.2 0.44
1 Mean (SD)
2 ANCOVA
Abbreviation: CI = Confidence Interval
t.test(Age~Sex, data=bebl)
## 
##  Welch Two Sample t-test
## 
## data:  Age by Sex
## t = -1.3092, df = 41.938, p-value = 0.1976
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
##  -7.907416  1.684750
## sample estimates:
## mean in group F mean in group M 
##        8.096667       11.208000
bebl2 <- lm(LatencyProximity ~ Age + Sex, data=bebl)

summary(bebl2)
## 
## Call:
## lm(formula = LatencyProximity ~ Age + Sex, data = bebl)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -14.781 -10.779  -5.124   8.822  27.459 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 17.28972    5.06283   3.415  0.00237 **
## Age          0.08807    0.38809   0.227  0.82248   
## SexM        -8.73461    6.27494  -1.392  0.17724   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.19 on 23 degrees of freedom
##   (18 observations deleted due to missingness)
## Multiple R-squared:  0.08297,    Adjusted R-squared:  0.003228 
## F-statistic:  1.04 on 2 and 23 DF,  p-value: 0.3693



Now let’s visualize a model that observes the relationship between age and latency proximity when looking at sex.

ggplot(bebl2) +
  geom_point(aes(x = Age, y = LatencyProximity, color = Sex), size = 3, alpha = 0.8) +
  geom_line(aes(x = Age, y = .fitted, color = Sex), linewidth = 1) +
  geom_vline(xintercept=mean(bebl$Age), linetype='dashed', color='black', size=0.5) +
  scale_colour_manual(values = c("F" = "red", "M" = "gray24")) +
  labs(
    x = "Age",
    y = "Latency Proximity (s)",
    color = "Sex"
  ) +
  theme_classic(base_size = 14) +  # removes gray background and gridlines, increases base text size
  theme(
    axis.title = element_text(size = 16, face = "bold"),
    axis.text = element_text(size = 14),
    legend.title = element_text(size = 14, face = "bold"),
    legend.text = element_text(size = 13),
    plot.background = element_rect(fill = "white", color = NA)
  )